In this notebook, we will visualize the graph model results from terra notebook.
Load Functions
Code
import os# local vs UGERif os.path.expanduser('~') in ["/Users/youyun", "/Users/youyunzheng"]: # in a local mac, the home directory is usuaully at '/Users/[username]' workdir = os.path.expanduser('~')+"/Documents/HMS/PhD/beroukhimlab/broad_mount/"else:# in dipg or uger, the home directory is usuaully at '/home/unix/[username]' workdir ="/xchip/beroukhimlab/"import pandas as pdimport numpy as npimport networkx as nx# import torch_geometricimport sklearnimport matplotlib.pyplot as pltfrom tqdm import tqdmfrom joblib import Parallel, delayedimport multiprocessingimport pickleimport reimport scipy.sparse as spfrom sklearn.model_selection import StratifiedKFoldimport umap.umap_ as umapimport csvimport mathimport torchimport torch.nn as nnimport torch.nn.functional as Ftorch.manual_seed(123)import numpy as npnp.random.seed(123)import time
Load Data
Code
"""Adapted from https://github.com/weihua916/powerful-gnns/blob/master/util.py"""class S2VGraph(object):def__init__(self, g, label, node_tags=None, node_features=None):''' g: a networkx graph label: an integer graph label node_tags: a list of integer node tags node_features: a torch float tensor, one-hot representation of the tag that is used as input to neural nets edge_mat: a torch long tensor, contain edge list, will be used to create torch sparse tensor neighbors: list of neighbors (without self-loop) '''self.label = labelself.g = gself.node_tags = node_tagsself.neighbors = []self.node_features =0self.edge_mat =0self.max_neighbor =0def create_s2v(graph_file): G = pickle.load(open(graph_file, 'rb')) graph_label = re.sub('_[0-9]*$','',list(G.nodes)[0])# relabel nodes to integer G = nx.relabel_nodes(G, {n: int(re.sub('.*_', '',n)) for n in G.nodes})# use fake label of 0 for everythingreturn S2VGraph(G, label =0), graph_labeldef load_data_from_nx(graph_files): num_cores = multiprocessing.cpu_count() g_list, label_list =zip(*Parallel(n_jobs = num_cores)( delayed(create_s2v)(graph_file) for graph_file in graph_files if\# pickle.load(open(graph_file, 'rb')).number_of_nodes() > 9 and# pickle.load(open(graph_file, 'rb')).number_of_nodes() <= 30 and pickle.load(open(graph_file, 'rb')).number_of_edges() >0 ))return g_list, label_list
Load Data
The final embedding that we will be using can be downloaded from this google cloud path – gs://fc-secure-4c1a93af-a9bd-458a-883f-8e317283e79f/models/UGformerV2_UnSup_lr0.0005_bs16_ep30_ff1024_nn5_sd512_do0.5_nl2_nt2_graph_embeddings.csv. This have been done and file path here /xchip/beroukhimlab/youyun/complexSV/data/TCGA/graph_embedding/UGformerV2_UnSup_lr0.0005_bs16_ep30_ff1024_nn5_sd512_do0.5_nl2_nt2_graph_embeddings.csv.